### R script for Fish 444 pop structure lab ### Charlie Waters 1/30/2014 ######################################### # 1. Set up R session ######################################### #B. Load the installed packages in to R for use #Again, hit Ctrl+Enter for each line separately #Or use R studio panel library(ape) library(ade4) library(adegenet) library(diveRsity) library(doParallel) library(foreach) library(genetics) library(hierfstat) library(iterators) library(parallel) library(sendplot) library(xlsx) library(BDgraph) # C. Set the working directory to a specific folder #just change my UWNetID to yours #and change the folder to the one you created on the Desktop setwd("C:/Users/Christine Savolainen/Desktop/Bio Informatics/Con Gen") #D. import a Genepop file and saves it as "cod_data_genepop" #in the form of a "genind" object salmon <- read.genepop("Class_data_genepop_new.gen", missing=NA) #E. Gives a summary of the data, #including number of individuals, alleles per locus, etc. summary(salmon) ###################################################################################################### # CODE FOR PART A - TEST WHETHER THE POPULATIONS DEVIATE FROM HARDY-WEINBERG EQUILIBRIUM ###################################################################################################### ### Break up the entire data set into genind objects for each population pop_labels <- c(rep("F1998",50),rep("I2002",50), rep("S2002",50),rep("I2006",50),rep("S2006",50),rep("I2010",50), rep("S2010",50) ) ## Creates a vector containing the population assignments of each individual #numbers in code are numbers of individuals in each population ## Creates a list of genind objects for each population pops_separated <- seppop(salmon,pop=pop_labels) names(pops_separated) #Creates a genind object comprising only the AD individuals data_Founders1998 <-pops_separated$Founders1998 #Verify that the genind object has the correct number of individuals and loci data_Founders1998 ###Repeat for all populations data_Int2002 <-pops_separated$Int2002 data_Seg2002 <-pops_separated$Seg2002 data_Int2006 <-pops_separated$Int2006 data_Seg2006 <-pops_separated$Seg2006 data_Int2010 <-pops_separated$Int2010 data_Seg2010 <-pops_separated$Seg2010 #### Compute observed and expected heterzygosity for #each population over all loci summary_Founders1998 <- summary(data_Founders1998) mean(summary_Founders1998$Hexp) mean(summary_Founders1998$Hobs) ### Test whether Hobs and Hexp are significantly different t.test(summary_Founders1998$Hobs,summary_Founders1998$Hexp,paired=TRUE) summary_Int2002 <- summary(data_Int2002) mean(summary_Int2002$Hexp) mean(summary_Int2002$Hobs) t.test(summary_Int2002$Hobs,summary_Int2002$Hexp,paired=TRUE) summary_Seg2002 <- summary(data_Seg2002) mean(summary_Seg2002$Hexp) mean(summary_Seg2002$Hobs) t.test(summary_Seg2002$Hobs,summary_Seg2002$Hexp,paired=TRUE) summary_Int2006 <- summary(data_Int2006) mean(summary_Int2006$Hexp) mean(summary_Int2006$Hobs) t.test(summary_Int2006$Hobs,summary_Int2006$Hexp,paired=TRUE) summary_Seg2006 <- summary(data_Seg2006) mean(summary_Seg2006$Hexp) mean(summary_Seg2006$Hobs) t.test(summary_Seg2006$Hobs,summary_Seg2006$Hexp,paired=TRUE) summary_Int2010 <- summary(data_Int2010) mean(summary_Int2010$Hexp) mean(summary_Int2010$Hobs) t.test(summary_Int2010$Hobs,summary_Int2010$Hexp,paired=TRUE) summary_Seg2010 <- summary(data_Seg2010) mean(summary_Seg2010$Hexp) mean(summary_Seg2010$Hobs) t.test(summary_Seg2010$Hobs,summary_Seg2010$Hexp,paired=TRUE) ################################################################################################################################## ### QUESTION: ### ARE THERE ANY POPULATIONS THAT SIGNIFICANTLY DEVIATE FROM HWE? IF SO, WHICH ONES? ################################################################################################################################## ### Now test for deviations from HWE at each locus within each population #This test uses simulation to compute a pvalue for HWE HWE_test_results <- HWE.test(salmon,pop=NULL,permut=TRUE, nsim=10000,res.type="matrix") #We want to correct for multiple tests, even though the Bonferroni is conservative corrected_pval <- 0.05/(11*11) corrected_pval #Creates a table of True/False for loci that are out of HWE (TRUE=out of HWE) HWE_logical <- HWE_test_results